import { esClient, TRACE_INDEX } from "../server/elasticsearch";
import type { ElasticSearchTrace } from "../server/tracer/types";
import { reservedTraceMetadataSchema } from "../server/tracer/types.generated";

const migrateIndex = async () => {
  let searchAfter: any;
  let response;
  let bulkActions = [];

  const client = await esClient();

  const extraneousMetadataFieldsToCheck = [
    "ANSWER.keyword",
    "Context.keyword",
    "InteractiveQuestions",
    "LANGGRAPH_API_URL.keyword",
    "LANGSMITH_LANGGRAPH_API_VARIANT.keyword",
    "Language.keyword",
    "LeaderBoard.keyword",
    "NO_EVIDENCE.keyword",
    "NO_EXISTING_ANSWER.keyword",
    "NumberOfContentSlides",
    "NumberOfInteractiveSlides",
    "NumberOfSlides",
    "NumberOfSubSections",
    "NumberOfTitles",
    "NumberOfVariants",
    "Outline.keyword",
    "OutputType.keyword",
    "QuizEasy",
    "QuizHard",
    "QuizMedium",
    "RewriteBoth",
    "RewriteContent",
    "RewriteTitle",
    "SlideList.keyword",
    "SlideNumber",
    "SpeakerNotes",
    "ToneOfVoice.keyword",
    "Topic.keyword",
    "WriteFor.keyword",
    "accountId.keyword",
    "accuracy_results",
    "accuracy_results.airport_correct",
    "accuracy_results.destination_correct",
    "accuracy_results.passenger_correct",
    "accuracy_results.pickup_correct",
    "actual_user_id.keyword",
    "age_group.keyword",
    "agentName.keyword",
    "agent_id.keyword",
    "agent_name.keyword",
    "agent_type.keyword",
    "aig_trace_id.keyword",
    "airport_detected",
    "api_url.keyword",
    "approach.keyword",
    "args.keyword",
    "asdsa.keyword",
    "assistant_id.keyword",
    "azure",
    "azure.vm",
    "azure.vm.scaleset",
    "azure.vm.scaleset.name.keyword",
    "azure.vm.sku.keyword",
    "bar.keyword",
    "batch_id.keyword",
    "chat_id.keyword",
    "checkpoint_ns.keyword",
    "chunk_id.keyword",
    "cli",
    "cli.version.keyword",
    "cloud",
    "cloud.platform.keyword",
    "cloud.provider.keyword",
    "cloud.region.keyword",
    "cloud.resource_id.keyword",
    "cluster.keyword",
    "completion_tokens",
    "complexity.keyword",
    "component.keyword",
    "conference.keyword",
    "conference_context.keyword",
    "confidence_score",
    "configuration_id.keyword",
    "container",
    "container.id.keyword",
    "content_type.keyword",
    "context_used.keyword",
    "conversationCount",
    "conversation_length",
    "conversation_type.keyword",
    "country.keyword",
    "created_by.keyword",
    "ctx",
    "ctx.attempt",
    "ctx.attempt.id.keyword",
    "ctx.environment",
    "ctx.environment.id.keyword",
    "ctx.environment.type.keyword",
    "ctx.machine",
    "ctx.machine.name.keyword",
    "ctx.organization",
    "ctx.organization.id.keyword",
    "ctx.organization.name.keyword",
    "ctx.organization.slug.keyword",
    "ctx.project",
    "ctx.project.id.keyword",
    "ctx.project.ref.keyword",
    "ctx.project.title.keyword",
    "ctx.queue",
    "ctx.queue.id.keyword",
    "ctx.queue.name.keyword",
    "ctx.run",
    "ctx.run.id.keyword",
    "ctx.task",
    "ctx.task.exportName.keyword",
    "ctx.task.filePath.keyword",
    "ctx.task.id.keyword",
    "current_product.keyword",
    "current_product_id",
    "custom_data.keyword",
    "dataset.keyword",
    "dataset_item.keyword",
    "dataset_run.keyword",
    "dayOfWeek",
    "demo.keyword",
    "department.keyword",
    "deployment",
    "deployment.environment.keyword",
    "deprecated",
    "deprecated.trace",
    "deprecated.trace.id.keyword",
    "deprecated.trace_id.keyword",
    "doc_name.keyword",
    "documentId.keyword",
    "document_id.keyword",
    "document_ids.keyword",
    "document_names.keyword",
    "duration_ms",
    "env.keyword",
    "environment.keyword",
    "evaluation_type.keyword",
    "evaluator.keyword",
    "eventType.keyword",
    "example.keyword",
    "execution_time",
    "expected_keywords_language.keyword",
    "expected_output",
    "expected_output.airport_found",
    "expected_output.destination_address.keyword",
    "expected_output.passenger_count",
    "expected_output.pickup_address.keyword",
    "extracted_data",
    "extracted_data.airport_found",
    "extracted_data.destination_address.keyword",
    "extracted_data.passenger_count",
    "extracted_data.pickup_address.keyword",
    "extracted_output",
    "extracted_output.airport_found",
    "extracted_output.destination_address.keyword",
    "extracted_output.passenger_count",
    "extracted_output.pickup_address.keyword",
    "extraction_success",
    "extraction_type.keyword",
    "fields_extracted",
    "finalInstruction.keyword",
    "floraParams",
    "floraParams.nextBestQuestionThreshold",
    "floraParams.perfectMatchThreshold",
    "floraParams.provideToLLMThreshold",
    "foo.keyword",
    "framework.keyword",
    "from_studio",
    "function.keyword",
    "function_answer_0.keyword",
    "function_ask_0.keyword",
    "gender.keyword",
    "generated_query.keyword",
    "git",
    "git.COMMIT_AUTHOR_LOGIN.keyword",
    "git.COMMIT_AUTHOR_NAME.keyword",
    "git.COMMIT_MESSAGE.keyword",
    "git.COMMIT_REF.keyword",
    "git.COMMIT_SHA.keyword",
    "git.PREVIOUS_SHA.keyword",
    "git.PROVIDER.keyword",
    "git.PULL_REQUEST_ID.keyword",
    "git.REPO_ID.keyword",
    "git.REPO_OWNER.keyword",
    "git.REPO_SLUG.keyword",
    "goalCount",
    "goals.keyword",
    "graph_id.keyword",
    "ground_truth.keyword",
    "guardrail_category.keyword",
    "guardrail_rejection",
    "guardrails_enabled",
    "hasPersonalityData",
    "hasProjectContext",
    "hello.keyword",
    "host",
    "host.arch.keyword",
    "host.id.keyword",
    "host.name.keyword",
    "host.type.keyword",
    "hostname.keyword",
    "hour",
    "image_url.keyword",
    "industry.keyword",
    "input.keyword",
    "input_language.keyword",
    "input_length",
    "input_message.keyword",
    "input_text.keyword",
    "intent.keyword",
    "intent_validator.keyword",
    "interaction_number",
    "interests.keyword",
    "is_monument",
    "jimmiy.keyword",
    "journalCount",
    "keywords.keyword",
    "kwargs.keyword",
    "label.keyword",
    "langgraph_api_url.keyword",
    "langgraph_api_version.keyword",
    "langgraph_auth_user_id.keyword",
    "langgraph_checkpoint_ns.keyword",
    "langgraph_host.keyword",
    "langgraph_node.keyword",
    "langgraph_path.keyword",
    "langgraph_plan.keyword",
    "langgraph_request_id.keyword",
    "langgraph_step",
    "langgraph_triggers.keyword",
    "langgraph_version.keyword",
    "language.keyword",
    "languageCode.keyword",
    "language_iso_code.keyword",
    "langwatch",
    "langwatch.sdk",
    "langwatch.sdk.language.keyword",
    "langwatch.sdk.name.keyword",
    "langwatch.sdk.version.keyword",
    "lc_hub_commit_hash.keyword",
    "lc_hub_owner.keyword",
    "lc_hub_repo.keyword",
    "llm_call_path.keyword",
    "llm_model.keyword",
    "loop",
    "loop.index.keyword",
    "ls_embedding_provider.keyword",
    "ls_max_tokens",
    "ls_model_name.keyword",
    "ls_model_type.keyword",
    "ls_provider.keyword",
    "ls_retriever_name.keyword",
    "ls_temperature",
    "ls_vector_store_provider.keyword",
    "max_context_docs",
    "max_tokens",
    "mcp_tool_call_metadata.keyword",
    "memory_type.keyword",
    "messageId.keyword",
    "messageType.keyword",
    "message_id.keyword",
    "message_length",
    "message_type.keyword",
    "metadata",
    "metadata.foo.keyword",
    "model.keyword",
    "modelUsed.keyword",
    "model_id.keyword",
    "model_name.keyword",
    "module_class.keyword",
    "monument_type.keyword",
    "multimodal",
    "municipality.keyword",
    "name.keyword",
    "new1.keyword",
    "new_total",
    "next",
    "next.public_url.keyword",
    "node",
    "node.env.keyword",
    "numPlots",
    "numThemes",
    "numberOfTitles",
    "operation.keyword",
    "orchestratorVersion.keyword",
    "orgId.keyword",
    "organizationId.keyword",
    "original_filename.keyword",
    "os",
    "os.description.keyword",
    "os.type.keyword",
    "os.version.keyword",
    "outputType.keyword",
    "output_length",
    "overall_correct",
    "perplexity",
    "platform.keyword",
    "primaryGenre.keyword",
    "process",
    "process.command.keyword",
    "process.command_line.keyword",
    "process.executable",
    "process.executable.name.keyword",
    "process.executable.path.keyword",
    "process.owner.keyword",
    "process.runtime",
    "process.runtime.description.keyword",
    "process.runtime.name.keyword",
    "process.runtime.version.keyword",
    "product_ids.keyword",
    "project.keyword",
    "projectId.keyword",
    "prompt.keyword",
    "prompt_id.keyword",
    "prompt_management_metadata.keyword",
    "prompt_template.keyword",
    "prompt_tokens",
    "province.keyword",
    "query.keyword",
    "query_type.keyword",
    "question_id.keyword",
    "reasoning.keyword",
    "relevance_grade.keyword",
    "requester_custom_headers.keyword",
    "requester_ip_address.keyword",
    "requester_metadata",
    "requires_tools",
    "researchGoal.keyword",
    "revision_id.keyword",
    "role.keyword",
    "routing_decision.keyword",
    "run_attempt",
    "run_id.keyword",
    "scratchpad.keyword",
    "sdk",
    "sdk.language.keyword",
    "sdk.version.keyword",
    "secondaryGenres.keyword",
    "seesionId.keyword",
    "sender.keyword",
    "service",
    "service.instance",
    "service.instance.id.keyword",
    "service.name.keyword",
    "service.version.keyword",
    "serviceName.keyword",
    "servicename.keyword",
    "session_id.keyword",
    "session_origin.keyword",
    "source_id.keyword",
    "spend_logs_metadata.keyword",
    "status.keyword",
    "step.keyword",
    "strategies_detected",
    "stream",
    "tags.keyword",
    "task_id.keyword",
    "telemetry",
    "telemetry.distro",
    "telemetry.distro.name.keyword",
    "telemetry.distro.version.keyword",
    "telemetry.sdk",
    "telemetry.sdk.language.keyword",
    "telemetry.sdk.name.keyword",
    "telemetry.sdk.version.keyword",
    "temperature",
    "tenant_id.keyword",
    "test.keyword",
    "test_case_index",
    "test_name.keyword",
    "test_status.keyword",
    "test_type.keyword",
    "text.keyword",
    "theme.keyword",
    "thought.keyword",
    "timeout",
    "timestamp",
    "toneOfVoice.keyword",
    "tool_call_count",
    "tool_calls.keyword",
    "tools_called",
    "top_k",
    "topic.keyword",
    "topics.keyword",
    "total_calls",
    "total_products",
    "trace_ID.keyword",
    "trace_id.keyword",
    "url_org.keyword",
    "url_user.keyword",
    "usage_object.keyword",
    "user-agent.keyword",
    "userGuidance.keyword",
    "userId.keyword",
    "user_api_key_alias.keyword",
    "user_api_key_end_user_id.keyword",
    "user_api_key_hash.keyword",
    "user_api_key_org_id.keyword",
    "user_api_key_team_alias.keyword",
    "user_api_key_team_id.keyword",
    "user_api_key_user_email.keyword",
    "user_api_key_user_id.keyword",
    "user_cpf.keyword",
    "user_email.keyword",
    "user_input.keyword",
    "user_name.keyword",
    "user_query.keyword",
    "vector_store_request_metadata.keyword",
    "venue.keyword",
    "vercel",
    "vercel.branch_host.keyword",
    "vercel.deployment_id.keyword",
    "vercel.host.keyword",
    "vercel.region.keyword",
    "vercel.runtime.keyword",
    "vercel.sha.keyword",
    "worker",
    "worker.id.keyword",
    "worker.version.keyword",
    "workflow_engine.keyword",
    "workspaceId.keyword",
    "writeFor.keyword",
    "x-auth-scheme.keyword",
    "x-request-id.keyword",
    "x-user-id.keyword",
  ];
  const reservedFields = Object.keys(reservedTraceMetadataSchema.shape).concat([
    "custom",
    "all_keys",
  ]);

  const fieldsToCheck = extraneousMetadataFieldsToCheck
    .filter((field) => !reservedFields.includes(field))
    .map((field) => `metadata.${field}`);

  do {
    const initialTime = performance.now();
    response = await client.search<ElasticSearchTrace>({
      index: TRACE_INDEX.alias,
      _source: {
        includes: ["trace_id", "metadata.*"],
      },
      body: {
        query: {
          bool: {
            should: fieldsToCheck.map((field) => ({
              exists: { field },
            })),
            minimum_should_match: 1,
          },
        },
        size: 10_000,
        sort: ["_doc"],
        ...(searchAfter ? { search_after: searchAfter } : {}),
      },
    });
    const results = response.hits.hits;
    searchAfter = results[results.length - 1]?.sort;
    process.stdout.write(
      `\nFetched ${results.length} more hits from ${
        (response as any).hits.total.value
      } total\n`,
    );

    for (let i = 0; i < results.length; i++) {
      const hit = results[i];
      if (!hit) continue;

      const metadata = hit._source?.metadata ?? {};
      const allKeys = metadata.all_keys ?? [];
      const custom = metadata.custom ?? {};
      const validatedReservedMetadata =
        reservedTraceMetadataSchema.safeParse(metadata).data ?? {};
      const extrataneousMetadata = Object.fromEntries(
        Object.entries(metadata).filter(
          ([key, _]) =>
            key !== "all_keys" &&
            key !== "custom" &&
            !(key in validatedReservedMetadata),
        ),
      );
      const newMetadata = {
        ...validatedReservedMetadata,
        custom: {
          ...custom,
          ...extrataneousMetadata,
        },
        all_keys: Array.from(
          new Set([...allKeys, ...Object.keys(extrataneousMetadata)]),
        ),
      };

      bulkActions.push({
        update: {
          _index: TRACE_INDEX.alias,
          _id: hit._id,
        },
      });
      bulkActions.push({
        script: {
          source: `ctx._source.metadata = params.newMetadata;`,
          lang: "painless",
          params: {
            newMetadata: newMetadata,
          },
        },
        _source: false,
      });

      process.stdout.write(`\r${i + 1}/${results.length} being updated`);

      if (bulkActions.length >= 400) {
        try {
          await client.bulk({ body: bulkActions });
          bulkActions = [];
        } catch (error) {
          console.error("Error in bulk update:", error);
        }
      }
    }

    if (bulkActions.length > 0) {
      try {
        await client.bulk({ body: bulkActions });
      } catch (error) {
        console.error("Error in bulk update:", error);
      }
    }

    console.log(`\nTook: ${(performance.now() - initialTime) / 1000}s`);
  } while (response.hits.hits.length > 0);
};

export default async function execute() {
  console.log("\nMigrating Traces");
  await migrateIndex();
}
